Package installation
Everything above might be unnecessary
##This is for the cases:
# url2 <- "https://forecast-eval.s3.us-east-2.amazonaws.com/score_cards_nation_cases.rds"
# download.file(url2, "eval_cases.RDS") # download to disk
# scores <- readRDS(paste0(here(), "/eval_cases.RDS"))
#scores_total <- rbind(scores,scores2)
url1 <- "https://forecast-eval.s3.us-east-2.amazonaws.com/score_cards_state_deaths.rds"
download.file(url1, "eval_deaths.RDS") # download to disk
scores2 <- readRDS(paste0(here(), "/eval_deaths.RDS"))
scores3 <- subset(scores2, forecaster %in% params$forecasters)
our_pred_dates <-
scores3 %>%
filter(forecaster == "CMU-TimeSeries")
our_pred_dates <- unique(our_pred_dates$forecast_date)
n_dates <- length(our_pred_dates)
# n_dates - 4 is often the most recent date with ground truth for 4 weeks ahead
# dates spaced out 2 weeks ahead to see different behaviors
forecast_dates <- our_pred_dates[n_dates- 2 *5:2]
scores3$forecast_date <-
if_else(scores3$forecaster %in% c("Karlen-pypm", "CU-select"), scores3$forecast_date + 1, scores3$forecast_date)
scores3 <- subset(scores3, forecast_date %in% forecast_dates)
results <- intersect_averagers(scores3, c("forecaster"), c("forecast_date", "geo_value"))
results %>%
group_by(forecast_date) %>%
summarise(n_distinct(geo_value))
## # A tibble: 3 x 2
## forecast_date `n_distinct(geo_value)`
## <date> <int>
## 1 2021-05-24 48
## 2 2021-06-07 48
## 3 2021-06-21 48
NOTE: Results are based on the following numbers of common locations
weeks.label = c("1 week ahead", "2 weeks ahead", "3 weeks ahead", "4 weeks ahead")
names(weeks.label) = c(1, 2, 3, 4)
subtitle = sprintf("Forecasts made over %s to %s",
format(min(forecast_dates), "%B %d, %Y"),
format(max(forecast_dates), "%B %d, %Y"))
plot_ae <-
plot_canonical(results,
x = "ahead",
y = "ae",
aggr = mean) +
labs(title = subtitle, x= "Weeks Ahead", y = "Mean AE",color='Forecasters') +
geom_point(aes(text=sprintf("Weeks Ahead: %s<br>Average Error: %s <br>Forecaster: %s",
ahead,
round(ae, digits=2),
color)),
alpha = 0.05) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) +
scale_y_log10()
if (params$colorblind_palette) {
plot_ae <- plot_ae +
scale_color_viridis_d()
}
ggplotly(plot_ae, tooltip="text", width=1000) %>%
layout(hoverlabel = list(bgcolor = "white"))
plot_wis <-
plot_canonical(results,
x = "forecast_date",
y = "wis",
aggr = mean,
grp_vars = c("forecaster","ahead"),
facet_rows = "ahead") +
labs(title = subtitle,
x = "Forecast Dates",
y = "Mean WIS",
color = "Forecasters") +
geom_point(aes(text=sprintf("Forecast Date: %s<br>Mean WIS: %s <br>Forecaster: %s",
forecast_date,
round(wis, digits = 2),
color)),
alpha = 0.05) +
facet_wrap(~ahead, nrow = 4, labeller = labeller(ahead=weeks.label)) +
theme(strip.background = element_rect(fill = "white")) +
theme_minimal() +
theme(plot.title = element_text(hjust = "center")) +
scale_y_log10()
if (params$colorblind_palette) {
plot_wis <- plot_wis +
scale_color_viridis_d()
}
ggplotly(plot_wis, tooltip="text", height=800, width= 1000) %>%
layout(hoverlabel = list(bgcolor = "white"))
plot_cover <-
plot_canonical(results,
x = "forecast_date",
y = "value_80",
aggr = mean,
grp_vars = c("forecaster","ahead"),
facet_rows = "ahead") +
labs(title = subtitle, x= "Forecast date", y = "Mean Coverage 80", color='Forecasters') +
geom_point(aes(text = sprintf("Forecast Date: %s<br>Coverage: %s <br>Forecaster: %s",
forecast_date,
round(value_80, digits = 2),
color)),
alpha = 0.05) +
facet_wrap(~ahead, nrow = 4, labeller = labeller(ahead = weeks.label)) +
theme(strip.background = element_rect(fill = "white")) +
theme_minimal() +
theme(plot.title = element_text(hjust = "center")) +
geom_line(mapping = aes(y = .8))
if (params$colorblind_palette) {
plot_cover <- plot_cover +
scale_color_viridis_d()
}
ggplotly(plot_cover, tooltip="text", height=800, width=1000) %>%
layout(hoverlabel = list(bgcolor = "white"))
Relative to baseline; scale first then take the geometric mean, ignoring a few 0’s. I think this is potentially more useful than the median/mean for relative WIS (or relative AE), but I haven’t completely thought it through. Putting the results here to be provocative.
geom_mean <- function(x) prod(x)^(1/length(x))
#geom_mean <- exp(mean(log((x+1)/(y+1)))) #still need to figure this out
mean_wis <-
plot_canonical(results %>%
filter(wis > 0),
x = "ahead",
y = "wis",
aggr = geom_mean,
base_forecaster = "COVIDhub-baseline",
scale_before_aggr = TRUE) +
labs(title = subtitle,
x = "Weeks Ahead",
y = "Mean (geometric) relative WIS",
color = "Forecasters") +
geom_point(aes(text = sprintf("Weeks Ahead: %s<br>WIS: %s <br>Forecaster: %s",
ahead,
round(wis, digits = 2),
color)),
alpha = 0.05) +
theme_minimal() +
theme(plot.title = element_text(hjust = "center")) +
geom_line(mapping = aes(y = 1))
if (params$colorblind_palette) {
mean_wis <- mean_wis +
scale_color_viridis_d()
}
ggplotly(mean_wis, tooltip="text", width= 1000) %>%
layout(hoverlabel = list(bgcolor = "white"))
mean_wis_forecast_date <-
plot_canonical(results %>%
filter(wis > 0),
x = "forecast_date",
y = "wis",
aggr = geom_mean,
facet_rows = "ahead",
grp_vars = c("forecaster", "ahead"),
base_forecaster = "COVIDhub-baseline",
scale_before_aggr = TRUE) +
theme(legend.position = "bottom") +
labs(title = subtitle,
x = "Forecast date",
y = "Mean (geometric) relative WIS",
color = "Forecasters") +
geom_point(aes(text = sprintf("Forecast Date: %s<br>WIS: %s <br>Forecaster: %s",
forecast_date,
round(wis, digits = 2),
color)),
alpha = 0.05) +
facet_wrap(~ahead, nrow = 4, labeller = labeller(ahead = weeks.label)) +
theme(strip.background = element_rect(fill = "white")) +
theme_minimal() +
theme(plot.title = element_text(hjust = "center")) +
geom_line(mapping = aes(y = 1))
if (params$colorblind_palette) {
mean_wis_forecast_date <- mean_wis_forecast_date +
scale_color_viridis_d()
}
ggplotly(mean_wis_forecast_date, tooltip = "text", height=800, width= 1000) %>%
layout(hoverlabel = list(bgcolor = "white"))
Note that the results are scaled by population.
library(sf)
maps <- results %>%
group_by(geo_value, forecaster) %>%
summarise(across(wis:ae, mean)) %>%
left_join(animalia::state_population, by = "geo_value") %>%
mutate(across(wis:ae, ~ .x / population * 1e5)) %>%
pivot_longer(wis:ae, names_to = "score") %>%
group_by(score) %>%
mutate(time_value = Sys.Date(),
r = max(value)) %>%
group_by(forecaster, .add = TRUE) %>%
group_split()
# for county prediction, set geo_type = "county"
maps <- purrr::map(maps,
~as.covidcast_signal(
.x, signal = .x$score[1],
data_source = .x$forecaster[1],
geo_type = "state"))
maps <- purrr::map(maps,
~plot(.x,
choro_col = scales::viridis_pal()(3),
range = c(0,.x$r[1])))
nfcasts <- length(unique(results$forecaster))
# original code
cowplot::plot_grid(plotlist = maps[1:nfcasts], ncol = 3)
cowplot::plot_grid(plotlist = maps[(nfcasts+1):length(maps)], ncol = 3)